import requests
from bs4 import BeautifulSoup
import csv
import time

# URL
base_url = 'https://www.anjuke.com/fangjia'
# 请求头
headers_S = {
    "Cookie": "aQQ_ajkguid=9668D2EF-92B3-D7CC-358A-1E93E5984B02; id58=CrIgxGaOPDOnenVAX/MpAg==; xxzlclientid=96e14e60-1fcc-4612-97c6-1720597555983; xxzlxxid=pfmxgTFi7KlezDNhhJx461TzxEh+CEhfBQ6IrZf8KMT48N68NHkqKY3LsAYGHvBI26cR; ajk-appVersion=; fzq_h=0d9b5ba2be72c983bf942b6a4bf7e004_1720597560505_90917d7e2bf24893bb1d7aeb8159d033_1897592920; sessid=6D4F2A70-9095-85F4-B5BC-9EDF39EAD229; obtain_by=2; twe=2; ctid=38; xxzl_cid=41f355ef7dc348b99992fc02677ddc3a; xxzl_deviceid=LfVDzgVnZTN6QwAZ6bZECT+CxPRlqdeZOZg/bvqezbA2oHbr1Qhj7O1oOeB/60Gq; xxzlbbid=pfmbM3wxMDM0NnwxLjkuMHwxNzIwNjgxNzExNzUwMzE3NjQ0fFBCbTNmUVF2S0MxYkxQemp6ZnZQSWhMRWxVbVIwNDB3RXpIT1hkbm9yNmM9fDM1M2IwYWQ3NGY0OTQzYTNkNjU0Y2EyMzUxZjU0N2E2XzE3MjA2ODE3MTEyMDFfY2NkNGQ1ODUwZWU5NDkxZjllOGVhNGI0ZDQzYzk4M2NfMzA4MzM5NzQ3NXxkNjU1M2M1NzgzZDZjYjE2YThmZDA0ZTBkOGFmZmE2ZF8xNzIwNjgxNzExMzQ3XzI1NQ==; xxzl_cid=41f355ef7dc348b99992fc02677ddc3a; xxzl_deviceid=LfVDzgVnZTN6QwAZ6bZECT+CxPRlqdeZOZg/bvqezbA2oHbr1Qhj7O1oOeB/60Gq",
    "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Mobile Safari/537.36 Edg/126.0.0.0"
}
year = 2012

def scrape_page(url, writer):
    if year == 2025:
        return False
    # 发起请求
    print('正在收集第%d年的数据' % year)
    response = requests.get(url, headers=headers_S)
    if response.status_code == 200:
        html_content = response.text
        soup = BeautifulSoup(html_content, 'html.parser')
        items = soup.select('#__next div div:nth-child(3) div:nth-child(2) div.table-content div div')

        # 解析页面内容
        for item in items:
            # 提取信息
            Date = item.select_one('div:nth-child(1)').text if item.select_one('div:nth-child(1)') else 'N/A'
            #print(Date)
            Price = item.select_one('div:nth-child(2)').text if item.select_one('div:nth-child(2)') else 'N/A'
            #print(Price)
            # 写入CSV文件
            writer.writerow([Date,Price])
    return True

# 写入CSV文件
with open('HousePrice_ty.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['DATE', 'PRICE'])
    while True:
        page_url = f"{base_url}/ty{year}"
        # 开始收集数据
        if not scrape_page(page_url, writer):
            break
        year += 1
        time.sleep(5)
    print("已全部收集完毕！")